#####################################################
########## Volume and Engagement by Source ##########

# Data In: "posts.csv"
          
# Data Out: 
# Figures 2a, 2b
# Figures 3a, 3b, 3c

######################################################

#Load Packages
library(readr)
library(tidyverse)
library(lubridate)


# Set options for plotting 
options(scipen=999999)

# Set working directory to replication folder (Refugee_Info_Replication)

# Open code file from "Refugee_Info_Replication/code/source_analysis.R"

# Obtain the full path of the current script in RStudio
script_path <- rstudioapi::getActiveDocumentContext()$path

# If the script path is non-empty, proceed to set the working directory
if (!is.null(script_path)) {
  # Calculate the parent directory of the script's directory
  parent_directory <- dirname(dirname(script_path))
  
  # Set the working directory to the parent directory
  setwd(parent_directory)
} else {
  cat("Script path is not set. Ensure your script is saved and you are running RStudio.")
}
# Check Working Directory
getwd()

#Read in Posts, Comments & Metadata
posts<-read_csv("data/posts.csv")


############
# FIGURE 2 #
############

# Plot News vs. Unofficial vs. Official Totals 
posts$count<-1
official_unofficial = posts %>% 
  group_by(post_type) %>%
  summarise(prop = sum(count)) 
official_unofficial<-na.omit(official_unofficial)

ggplot(official_unofficial, aes(fill=post_type, y=prop, x=post_type)) + 
  geom_bar(position="dodge", stat="identity", color="black")+theme_minimal(base_size=22)+
  labs(y = "Total Volume of Posts \n", x = "Source")+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank())+
  scale_fill_grey()+
  theme_minimal(base_size=22)+
  theme(legend.position="none")
ggsave("plots/Figure_2a.pdf", width = 11, height = 7)

# Plot News vs. Unofficial vs. Official Over Time 
official_unofficial = posts %>% 
  group_by(month = floor_date(date, unit="month"), post_type) %>%
  summarise(prop = sum(count)) %>%
  filter(month >= ymd("2013-01-01")) 

official_unofficial %>% 
  ggplot() + 
  aes(x = month, y = prop, group=post_type, color="black", fill="post_type") + 
  geom_smooth(method=loess, span=.05, se=FALSE) +
  geom_point()+
  labs(y = "Monthly Volume of Posts", x = "Date")+
  theme_minimal(base_size=22)+
  theme(legend.position = "none")+
  facet_wrap(~post_type, scales="free_y")+
  scale_color_grey()
ggsave("plots/Figure_2b.pdf", width = 11, height = 7)

############
# FIGURE 3a #
############

# Calculate mean engagement per post by source

mean_engagement= posts %>% 
  group_by(post_type) %>%
  summarise(total = sum(engagement_fb))
mean_engagement$denominator<-nrow(posts)
mean_engagement$denominator[mean_engagement$post_type=="official"]<-nrow(subset(posts, post_type=="official"))
mean_engagement$denominator[mean_engagement$post_type=="unofficial"]<-nrow(subset(posts, post_type=="unofficial"))
mean_engagement$denominator[mean_engagement$post_type=="news"]<-nrow(subset(posts, post_type=="news"))
mean_engagement$mean_engagement<-mean_engagement$total/as.numeric(mean_engagement$denominator)


ggplot(mean_engagement, aes(fill=post_type, y=mean_engagement, x=post_type)) + 
  geom_bar(position="dodge", stat="identity", color="black")+theme_minimal(base_size=22)+
  labs(y = "Mean Engagement per Post \n", x = "Source")+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank())+
  scale_fill_grey()+
  theme_minimal(base_size=22)+
  theme(legend.title=element_blank())+
  theme(legend.position = "none")
ggsave("plots/Figure_3a.pdf", width = 11, height = 7)


#############
# FIGURE 3b #
#############

engagement_time = posts %>% 
  group_by(month = floor_date(date, unit="month"), post_type) %>%
  summarise(epm = sum(engagement_fb), ppm=sum(n())) %>%
  filter(month >= ymd("2013-01-01")) 
engagement_time$mean_engagement<-engagement_time$epm/engagement_time$ppm

engagement_time %>% 
  ggplot() + 
  aes(x = month, y = mean_engagement, color="black") + 
  geom_smooth(method=loess, span=.05, se=FALSE) +
  geom_point()+
  labs(y = "Mean Monthly Engagement Per Post Post", x = "Date")+
  theme_minimal(base_size=22)+
  facet_wrap(~post_type, scales="fixed")+
  theme(legend.position="none")+
  scale_color_grey()
ggsave("plots/Figure_3b.pdf", width = 11, height = 7)

#############
# FIGURE 3c #
#############

# Subset Data to Official Posts Only
official<-subset(posts, post_type=="official")

# Engagement with Official Sources Pre-2016 Sources vs. Post 2016 Sources 

mean_engagement= official %>% 
  group_by(pre_2016_source) %>%
  summarise(total = sum(engagement_fb))
mean_engagement$denominator<-NA
mean_engagement$denominator[mean_engagement$pre_2016_source=="pre-2016 source"]<-nrow(subset(official, pre_2016_source=="pre-2016 source"))
mean_engagement$denominator[mean_engagement$pre_2016_source=="post-2016 source"]<-nrow(subset(official, pre_2016_source=="post-2016 source"))
mean_engagement$mean_engagement<-mean_engagement$total/as.numeric(mean_engagement$denominator)

ggplot(mean_engagement, aes(fill=pre_2016_source, y=mean_engagement, x=pre_2016_source)) + 
  geom_bar(position="dodge", stat="identity", color="black")+theme_minimal(base_size=22)+
  labs(y = "Mean Engagement per Post \n", x = "Source")+
  scale_fill_grey()+
  theme(legend.position="none")
ggsave("plots/Figure_3c_left.pdf", width = 11, height = 7)


# Engagement with Offical Sources that were active pre/post 2016 Over Time 

engagement_time = official %>% 
  group_by(month = floor_date(date, unit="month"), pre_2016_source) %>%
  summarise(epm = sum(engagement_fb), ppm=sum(n())) %>%
  filter(month >= ymd("2013-01-01")) 
engagement_time$mean_engagement<-engagement_time$epm/engagement_time$ppm


engagement_time %>% 
  ggplot() + 
  aes(x = month, y = mean_engagement, color="black") + 
  geom_smooth(method=loess, span=.05, se=FALSE) +
  geom_point()+
  labs(y = "mean Monthly Engagement Per Post Post", x = "Date")+
  theme_minimal(base_size=22)+
  facet_wrap(~pre_2016_source, scales="fixed")+
  theme(legend.position="none")+
  scale_color_grey()
ggsave("plots/Figure_3c_right.pdf", width = 11, height = 7)




